df %>% select(`Company response to consumer`,year) %>%
group_by(`Company response to consumer`, year) %>%
summarise(number_of_complaints = n()) %>%
arrange(desc(number_of_complaints))
## # A tibble: 54 x 3
## # Groups: Company response to consumer [9]
## `Company response to consumer` year number_of_complaints
## <chr> <int> <int>
## 1 Closed with explanation 2019 220449
## 2 Closed with explanation 2018 209346
## 3 Closed with explanation 2017 202692
## 4 Closed with explanation 2016 151698
## 5 Closed with explanation 2015 131018
## 6 Closed with explanation 2014 118186
## 7 Closed with explanation 2013 82032
## 8 Closed with non-monetary relief 2019 41792
## 9 Closed with explanation 2012 36116
## 10 Closed with non-monetary relief 2018 34600
## # … with 44 more rows
There is no seasonable or periodic pattern behind these complaints.
year_tend <- function(company_name){
title <- paste("Complaints of ", company_name,
" throughout 2019")
p <- df %>%
filter(year == '2019') %>%
filter(Company == company_name) %>%
mutate(date = as.POSIXct(paste(month , day , sep = "." ),
format = "%m.%d" )) %>%
group_by(date, `Company response to consumer`) %>%
summarise(number_of_complaints = n())
pp <- ggplot(p, aes(x = date, y = number_of_complaints,
color = `Company response to consumer`)) +
geom_line()+
ylab("Number of complaints") +
theme_tufte() +
scale_x_datetime(labels= date_format("%b"),
date_breaks = '1 month') +
scale_y_log10() +
theme(plot.title = element_text(size=14, face="bold", hjust = 0.5),
legend.text = element_text(size=8),
legend.title = element_text(size=8),
axis.text.x = element_text(angle = 45)) +
ggtitle(title)
return(ggplotly(pp))
}
year_tend("EQUIFAX, INC.")
year_tend_2 <- function(company_name){
title <- paste("Complaints of ", company_name,
" throughout 2019")
p <- df %>%
filter(year == '2019') %>%
filter(Company == company_name) %>%
mutate(date = as.POSIXct(paste(month , day , sep = "." ),
format = "%m.%d" )) %>%
group_by(date, `Company response to consumer`) %>%
summarise(number_of_complaints = n())
pp <- ggplot(p, aes(x = date, y = number_of_complaints,
color = `Company response to consumer`)) +
geom_line()+
ylab("Number of complaints") +
theme_tufte() +
scale_x_datetime(labels= date_format("%b"),
date_breaks = '1 month') +
theme(plot.title = element_text(size=14, face="bold", hjust = 0.5),
legend.text = element_text(size=8),
legend.title = element_text(size=8),
axis.text.x = element_text(angle = 45)) +
ggtitle(title)
return(ggplotly(pp))
}
year_tend_2("BANK OF AMERICA, NATIONAL ASSOCIATION")
DT <- function(company_name){
df_new <- df %>%
filter(Company == company_name) %>%
mutate(date = as.POSIXct(paste(month , day , sep = "." ),
format = "%m.%d" )) %>%
group_by(date, Product, State,
`Submitted via`, `Company response to consumer`) %>%
summarise(number_of_complaints = n()) %>%
arrange(desc(number_of_complaints)) %>%
head(100)
pretty_headers <-
gsub("[.]", " ", colnames(df_new)) %>%
str_to_title()
title <- paste('Table 1: This is a simple data table for the complaints of ', company_name," .")
DT <- df_new %>%
datatable(
caption = title,
rownames = FALSE,
class = 'cell-border stripe',
colnames = pretty_headers,
filter = list(position = "top"),
options = list(
dom = "Bfrtip",
buttons = I("colvis"),
language = list(sSearch = "Filter:")
),
extensions = c("Buttons", "Responsive")
)
return(DT)
}
DT("EQUIFAX, INC.")
df_ml <- df_raw %>%
filter(df_raw$Company == "BANK OF AMERICA, NATIONAL ASSOCIATION") %>%
select(Product, State, `Company response to consumer`,
`Company public response`,
`Consumer complaint narrative`, `Consumer consent provided?`)
df_ml$`Company response to consumer` <- ifelse(df_ml$`Company response to consumer` == "Closed with monetary relief", 1, 0)
df_ml$`Consumer consent provided?` <- ifelse(df_ml$`Consumer consent provided?` == "Consent provided", 1, 0)
df_ml$`Consumer complaint narrative` <- ifelse(df_ml$`Consumer complaint narrative` == 'NA', 0, 1)
df_ml$`Consumer complaint narrative`[is.na(df_ml$`Consumer complaint narrative`)] <- 0
df_ml$`Company public response` <- ifelse(df_ml$`Company public response` == 'NA', 0, 1)
df_ml$`Company public response`[is.na(df_ml$`Company public response`)] <- 0
df_ml <- df_ml %>%
filter(is.na(`Company public response`) == FALSE) %>%
filter(is.na(`Consumer complaint narrative`) == FALSE) %>%
filter(is.na(`Consumer consent provided?`) == FALSE)
df_ml$`Company response to consumer` <-
factor(df_ml$`Company response to consumer`, labels = c("withMonetaryRelief", "noMonetaryRelief"),
levels = 1:0)
set.seed(12345)
in_train <- createDataPartition(y = df_ml$`Company response to consumer`,
p = 0.8, list = FALSE)
training <- df_ml[ in_train, ]
testing <- df_ml[-in_train, ]
rf <- randomForest(`Company response to consumer` ~
as.factor(training$Product) +
training$`Company response to consumer` +
training$`Company public response` +
training$`Consumer complaint narrative` +
training$`Consumer consent provided?`,
data=training,
importance = TRUE,
na.action = na.omit)
y_hat_rf <- predict(rf, newdata = testing,
type = "response", na.action = na.pass)
## Error in x[...] <- m: NAs are not allowed in subscripted assignments
confusionMatrix(y_hat_rf, reference = testing$flow_type_dummy)
## Error in confusionMatrix(y_hat_rf, reference = testing$flow_type_dummy): object 'y_hat_rf' not found
LDA <- train(`Company response to consumer` ~
as.factor(Product) +
`Company response to consumer` +
`Company public response` +
`Consumer complaint narrative` +
`Consumer consent provided?`,
data = training, method = "lda",
preProcess = c("center", "scale"))
z_LDA <- predict(LDA, newdata = testing)
## Error in predict.lda(modelFit, newdata): wrong number of variables
confusionMatrix(z_LDA, reference = testing$flow_type_dummy)
## Error in confusionMatrix(z_LDA, reference = testing$flow_type_dummy): object 'z_LDA' not found